{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "cell_0",
      "metadata": {},
      "source": [
        "# ContextGem: StringConcept Extraction with References and Justifications"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "cell_1",
      "metadata": {},
      "outputs": [],
      "source": [
        "%pip install -U contextgem"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "cell_2",
      "metadata": {},
      "source": [
        "To run the extraction, please provide your LLM details in the ``DocumentLLM(...)`` constructor further below."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "cell_3",
      "metadata": {},
      "outputs": [],
      "source": [
        "# ContextGem: StringConcept Extraction with References and Justifications\n",
        "\n",
        "import os\n",
        "\n",
        "from contextgem import Document, DocumentLLM, StringConcept\n",
        "\n",
        "\n",
        "# Sample document text containing financial information\n",
        "financial_text = \"\"\"\n",
        "2024 Financial Performance Summary\n",
        "\n",
        "Revenue increased to $120 million in fiscal year 2024, representing 15% growth compared to the previous year. This growth was primarily driven by the expansion of our enterprise client base and the successful launch of our premium service tier.\n",
        "\n",
        "The Board has recommended a dividend of $1.25 per share, which will be payable to shareholders of record as of March 15, 2025.\n",
        "\"\"\"\n",
        "\n",
        "# Create a Document from the text\n",
        "doc = Document(raw_text=financial_text)\n",
        "\n",
        "# Create a StringConcept with justifications and references enabled\n",
        "key_figures_concept = StringConcept(\n",
        "    name=\"Financial key figures\",\n",
        "    description=\"Important financial metrics and figures mentioned in the report\",\n",
        "    add_justifications=True,  # enable justifications to understand extraction reasoning\n",
        "    justification_depth=\"balanced\",\n",
        "    justification_max_sents=3,  # allow up to 3 sentences for each justification\n",
        "    add_references=True,  # include references to source text\n",
        "    reference_depth=\"sentences\",  # reference specific sentences rather than paragraphs\n",
        ")\n",
        "\n",
        "# Attach the concept to the document\n",
        "doc.add_concepts([key_figures_concept])\n",
        "\n",
        "# Configure DocumentLLM with your API parameters\n",
        "llm = DocumentLLM(\n",
        "    model=\"azure/gpt-4o-mini\",\n",
        "    api_key=os.getenv(\"CONTEXTGEM_AZURE_OPENAI_API_KEY\"),\n",
        "    api_version=os.getenv(\"CONTEXTGEM_AZURE_OPENAI_API_VERSION\"),\n",
        "    api_base=os.getenv(\"CONTEXTGEM_AZURE_OPENAI_API_BASE\"),\n",
        ")\n",
        "\n",
        "# Extract the concept\n",
        "key_figures_concept = llm.extract_concepts_from_document(doc)[0]\n",
        "\n",
        "# Print the extracted items with justifications and references\n",
        "print(\"Extracted financial key figures:\")\n",
        "for item in key_figures_concept.extracted_items:\n",
        "    print(f\"\\nFigure: {item.value}\")\n",
        "    print(f\"Justification: {item.justification}\")\n",
        "    print(\"Source references:\")\n",
        "    for sent in item.reference_sentences:\n",
        "        print(f\"- {sent.raw_text}\")\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}